clear all
set maxvar 30000
version 14
capture log close
set more off

sysdir set PLUS "S:/Scratch/u6031818/"
ssc install gtools
****************************************************************************************************
* -----   Customize the paths and options:   ----- 
****************************************************************************************************
*cd  "Data\"
global MY_IN_PATH   "C:\Users\benjamin.balsmeier\Dropbox\RnD_tax_credit\Data"
global MY_OUT_PATH  "C:\Users\benjamin.balsmeier\Dropbox\RnD_tax_credit\Data"
*global MY_TEMP_PATH "..."

global data "S:/Scratch/u6031818/"


global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
global MY_LOG_FILE  ${MY_OUT_PATH}cr_tp.log

log using "${MY_LOG_FILE}", text replace
****************************************************************************************************
* import data
*************************************************************************************************

use ${MY_IN_PATH}/patents.dta, clear

* create matrix for uspc level

* gen class dummy variables
foreach num of numlist 1/481 {
	gen c`num' = 0
	replace c`num' = 1 if uspcid == `num'
	dis `num'
}

* sum pats at the gvkey level
foreach num of numlist 1/481 {
	dis `num'
	gegen h`num' = sum(c`num'), by(gvkey ayear)
}

drop c1-c481

keep patent uspcid uspc ayear appl_dt gvkey h1-h481 m_kpss

save ${data}/patents_temp1.dta, replace
use ${data}/patents_temp1.dta, clear

*GVKEY by YEAR setup
bysort gvkey ayear: gen n =_n
keep if n ==1
drop n

drop patent appl_dt m_kpss uspcid uspc
order gvkey ayear
sort gvkey ayear

foreach num of numlist 1/481 {
ren h`num' c`num'
}

* gen pat_stock_per class and gvkey
sort gvkey ayear
foreach num of numlist 1/481 {
gen c`num'_cum5 = 0
dis `num'
}
sort gvkey ayear
foreach num of numlist 1/481 {
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-1] if gvkey==gvkey[_n-1] & ayear == (ayear[_n-1]+1) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-2] if gvkey==gvkey[_n-2] & ayear == (ayear[_n-2]+2) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-3] if gvkey==gvkey[_n-3] & ayear == (ayear[_n-3]+3) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-4] if gvkey==gvkey[_n-4] & ayear == (ayear[_n-4]+4) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-5] if gvkey==gvkey[_n-5] & ayear == (ayear[_n-5]+5) 
dis `num'
}

** test
foreach num of numlist 1/481 {
dis `num'
count if c`num'_cum5 - c`num'_cum > 0
}
save ${data}/patents_temp2.dta, replace

*****************************************************************************************************
	  
*MATCH THE PATENT.DTA uspc code number to the actual in tech_proximity one
use ${MY_IN_PATH}/patents.dta, clear
keep uspc uspcid
duplicates drop
destring uspc, ignore("D") replace
rename uspc nclass
save  ${MY_IN_PATH}/patents_class_id2.dta, replace

*TECH PROX FILE
use  ${data}/clas_prox_all_pairs_2.dta, clear
keep nclass1
rename nclass1 nclass
duplicates drop

joinby nclass using ${data}/patents_class_id2.dta, unm(b)
tab _m
drop if _m==1
drop _m	 
save  ${data}/patents_class_id_to_tech_match.dta, replace

*UPDATE THE TECH PROXIMITY MATRIX
use  ${data}/clas_prox_all_pairs_2.dta, clear
rename nclass1 nclass 
joinby nclass using ${data}/patents_class_id_to_tech_match.dta, unm(b)
tab _m
keep if _m==3
drop _m	 
rename nclass nclass1
rename uspcid uspcid1

rename nclass2 nclass
joinby nclass using ${data}/patents_class_id_to_tech_match.dta, unm(b)
tab _m
keep if  _m==3
drop _m	 
rename nclass nclass2
rename uspcid uspcid2
save  ${data}/class_prox_all_pairs_id_3.dta, replace

drop nclass*
rename uspcid* class*
compress
sort class1 class2
*469 different classes
save ${data}/class_prox_all_pairs_id_4_class.dta, replace


use  ${data}/class_prox_all_pairs_id_4_class.dta, clear
*generate mean 
egen med_prox=median(class_prox)
*gen mean by class
bys class1: egen med_prox_class=median(class_prox)
drop if missing(class1)

egen mean_prox=mean(class_prox)
*gen mean by class
bys class1: egen mean_prox_class=mean(class_prox)
drop if missing(class1)

*top 25% 
egen top75_prox=pctile(class_prox), p(75)
bys class1: egen top75_prox_class=pctile(class_prox), p(75)

*top 10% 
egen top90_prox=pctile(class_prox), p(90)
bys class1: egen top90_prox_class=pctile(class_prox), p(90)

*Max by class
bys class1: egen max_prox_class=max(class_prox) if class_prox!=1

*gen mean by class
bys class1: egen med_prox_class=median(class_prox)

gen high_prox=(class_prox>med_prox)
gen high_prox_class=(class_prox>med_prox_class)

gen high75_prox=(class_prox>=top75_prox)
gen high75_prox_class=(class_prox>=top75_prox_class)

gen high90_prox=(class_prox>=top90_prox)
gen high90_prox_class=(class_prox>=top90_prox_class)

gen max_prox=(class_prox==max_prox_class)


save ${data}/prox_class_mean_med.dta, replace

keep class_prox class1 class2 high* max_prox
save ${data}/prox_class_mean_med_2.dta, replace



*************************************************
*CREATE GVKEY YEAR CLASS pair matched panel for all classes
use ${MY_IN_PATH}/patents.dta, clear
keep gvkey ayear
duplicates drop
order gvkey ayear
sort  gvkey ayear
gen n=481
expand n 
sort gvkey ayear
bys gvkey ayear: gen n1=_n
drop n
rename n1 class1
compress
save ${MY_IN_PATH}/patents_pairwise_class1.dta, replace

*SPLIT DATA INTO PIECES
use ${data}/patents_pairwise_class1.dta, clear
keep gvkey
duplicates drop

gen id=_n

gen group=.
foreach i of numlist 1/80 {
replace group=`i' if id>=`i'*100-99 & id<=`i'*100
}

save ${data}/patents_pairwise_class1_split.dta, replace

*DO BY A THOUSAND

foreach i of numlist 1/78 {
		use ${data}/patents_pairwise_class1_split.dta, clear
		drop id
		keep if group==`i' 
		drop group
		joinby gvkey using ${data}/patents_pairwise_class1.dta, unm(b)
		keep if _m==3
		drop _m
expand n 
sort gvkey ayear class1
bys gvkey ayear class1: gen n1=_n
drop n
rename n1 class2
compress
		save ${data}/groups/patents_pairwise_class1_split_gr`i'.dta, replace
}


**********
*TRANSPOSE THE DATASET
use ${data}/patents_temp2.dta, clear
rename *_cum5 cum5_*
reshape long c cum5_c, i(gvkey ayear) j(class)
compress
save ${data}/patents_temp3.dta, replace

*delete all that have no patents & none 5 years prior

use ${data}/patents_temp3.dta, clear
drop if c==0 & cum5_c==0
save ${data}/patents_temp4.dta, replace

****************************************************************************************************
**# Bookmark #1
*Merge with the count of patents
foreach i of numlist 1/78 {
		use ${data}/groups/patents_pairwise_class1_split_gr`i'.dta, clear
		rename class1 class
		joinby gvkey ayear class using ${data}/patents_temp4.dta, unm(b)
		tab _m
		drop if _m==2
		drop _m
		rename class class1 
		rename c c1
		rename cum5_c cum5_c1
		replace c1=0 if missing(c1)
		replace cum5_c1=0 if missing(cum5_c1)
		
		rename class2 class
		joinby gvkey ayear class using ${data}/patents_temp4.dta, unm(b)
		tab _m
		drop if _m==2
		drop _m
		rename class class2 
		rename c c2
		rename cum5_c cum5_c2
		replace c2=0 if missing(c2)
		replace cum5_c2=0 if missing(cum5_c2)
		compress
		
		
*joinby class1 class2 using ${data}/prox_class_mean_med_2.dta, unm(b)
*drop if _m==2
*drop _m
		
		save ${data}/groups1/patents_pairwise_class1_split_gr`i'_count.dta
}

*Merge with the class proximity scores

foreach i of numlist 2/78 {
use ${data}/groups1/patents_pairwise_class1_split_gr`i'_count.dta, clear
*keep only new patents based on OLD definition
keep if c1 > 0 & cum5_c1 ==0

joinby class1 class2 using ${data}/prox_class_mean_med_2.dta, unm(b)
keep if _m==3
drop _m

*keep only new patents based on OLD definition
*keep if c1 > 0 & cum5_c1 ==0

*Identify "Not entirely new" 
/*
gen not_entirely_new50=(high_prox==1 & (c2>0 | cum5_c2>0))
gen not_entirely_new50_class=(high_prox_class==1 & (c2>0 | cum5_c2>0))
gen not_entirely_new75=(high75_prox==1 & (c2>0 | cum5_c2>0))
gen not_entirely_new75_class=(high75_prox_class==1 & (c2>0 | cum5_c2>0))
gen not_entirely_new90=(high90_prox==1 & (c2>0 | cum5_c2>0))
gen not_entirely_new90_class=(high90_prox_class==1 & (c2>0 | cum5_c2>0))
gen not_entirely_new_max=(max_prox==1 & (c2>0 | cum5_c2>0))


*compress by class  - identify which class is NOT entirely new
bys gvkey ayear class1:egen count_not_new50=sum(not_entirely_new50) 
bys gvkey ayear class1:egen count_not_new50_class=sum(not_entirely_new50_class) 
bys gvkey ayear class1:egen count_not_new75=sum(not_entirely_new75) 
bys gvkey ayear class1:egen count_not_new75_class=sum(not_entirely_new75_class)
bys gvkey ayear class1:egen count_not_new90=sum(not_entirely_new90) 
bys gvkey ayear class1:egen count_not_new90_class=sum(not_entirely_new90_class)
bys gvkey ayear class1:egen count_not_new_max=sum(not_entirely_new_max)
*/

gen not_entirely_new50=(high_prox==1 &  cum5_c2>0)
gen not_entirely_new50_class=(high_prox_class==1 &  cum5_c2>0)
gen not_entirely_new75=(high75_prox==1 &  cum5_c2>0)
gen not_entirely_new75_class=(high75_prox_class==1 & cum5_c2>0)
gen not_entirely_new90=(high90_prox==1 & cum5_c2>0)
gen not_entirely_new90_class=(high90_prox_class==1 &  cum5_c2>0)
gen not_entirely_new_max=(max_prox==1 & cum5_c2>0)


*compress by class  - identify which class is NOT entirely new
bys gvkey ayear class1:egen count_not_new50=sum(not_entirely_new50) 
bys gvkey ayear class1:egen count_not_new50_class=sum(not_entirely_new50_class) 
bys gvkey ayear class1:egen count_not_new75=sum(not_entirely_new75) 
bys gvkey ayear class1:egen count_not_new75_class=sum(not_entirely_new75_class)
bys gvkey ayear class1:egen count_not_new90=sum(not_entirely_new90) 
bys gvkey ayear class1:egen count_not_new90_class=sum(not_entirely_new90_class)
bys gvkey ayear class1:egen count_not_new_max=sum(not_entirely_new_max)


*sum count_not*
/*
    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
count_not~50 |  2,197,734    27.83525    26.86849          0        159
cou~50_class |  2,197,734    25.25523    23.70084          0        145
count_not~75 |  2,197,734    19.00149     18.0479          0        129
coun~5_class |  2,197,734    16.34315    15.20461          0         92
count_not~90 |  2,197,734    10.13231    10.25984          0         76
-------------+---------------------------------------------------------
cou~90_class |  2,197,734    8.169868     7.74055          0         41
count_not_~x |  2,197,734    .2390098    .4264788          0          1

*/
*bys gvkey ayear class1: egen nclasses=count(class2)
keep gvkey ayear class1 count_not* 
duplicates drop
compress
save ${data}/group2/patents_pairwise_class1_split_gr`i'_count_notnew.dta, replace
}

*Merge
cd  ${data}/group2

local statfiles : dir . files "*.dta"
clear
**create a blankvariable so you can save a blank dataset to start appending to
gen blankvar=.
save  "${data}/patent_notnew.dta", replace
foreach data in `statfiles' {
    use "${data}/patent_notnew.dta", clear
    append using `data', force
    save "${data}/patent_notnew.dta", replace
}

drop blankvar
compress
save "${data}/patent_notnew.dta", replace 
clear

*Replace each "count" variable with dummy (if count>0 = 1, 0 otherwise)

use "${data}/patent_notnew.dta", clear
foreach var of varlist count* {
gen new_`var'=(`var'>0)	
drop `var'	
}

save "${data}/patent_notnew_class_indicator.dta", replace


*TRANSPOSE TO MATCH ORIGINAL DATA
use "${data}/patent_notnew_class_indicator.dta", clear
rename new_count_not_new* notnew*
rename notnew* notnew*_cl
reshape wide *_cl, i(gvkey ayear) j(class1)
save "${data}/patent_notnew_class_indicator_wide.dta", replace


*****************************************************************************************************
*****************************************************************************************************
*****************************************************************************************************
*****************************************************************************************************

use ${data}/patents_temp2.dta, clear
joinby gvkey ayear using ${data}/patent_notnew_class_indicator_wide.dta, unm(b)
tab _m
drop _m

foreach var of varlist notnew* {
	replace `var'=0 if missing(`var')
}
save ${data}/patents_temp2_notnew.dta, replace


use ${data}/patents_temp2_notnew.dta, clear
*** gen dummy for entering new techclass
foreach num of numlist 1/481 {
gen new`num' = 0
replace new`num' = 1 if c`num' > 0 & c`num'_cum5 ==0
}

*************************************
*Adjust the dummy for not Actually entering based on proximity

*GEN VARAIBLES FOR MISSING CLASSES
foreach i of numlist 4, 14, 15, 17, 18, 35, 42, 384,374, 463, 470, 481 {
gen notnew50_cl`i'=0 
gen notnew50_class_cl`i'=0 
gen notnew75_cl`i' =0 
gen notnew75_class_cl`i' =0 
gen notnew90_cl`i' =0 
gen notnew90_class_cl`i' =0 
gen notnew_max_cl`i'=0 
}

*notnew50
foreach num of numlist 1/481 {
gen new`num'_50 = 0
replace new`num'_50 = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew50_cl`num'==0
}

*notnew50_class
foreach num of numlist 1/481 {
gen new`num'_50_class = 0
replace new`num'_50_class = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew50_class_cl`num'==0
}

*notnew75
foreach num of numlist 1/481 {
gen new`num'_75 = 0
replace new`num'_75 = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew75_cl`num'==0
}

*notnew75_class
foreach num of numlist 1/481 {
gen new`num'_75_class = 0
replace new`num'_75_class = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew75_class_cl`num'==0
}


*notnew90
foreach num of numlist 1/481 {
gen new`num'_90 = 0
replace new`num'_90 = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew90_cl`num'==0
}

*notnew90_class
foreach num of numlist 1/481 {
gen new`num'_90_class = 0
replace new`num'_90_class = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew90_class_cl`num'==0
}

*notnew_max
foreach num of numlist 1/481 {
gen new`num'_max = 0
replace new`num'_max = 1 if c`num' > 0 & c`num'_cum5 ==0 & notnew_max_cl`num'==0
}

*************************************
* Gen pats in new classes
foreach num of numlist 1/481 {
gen new`num'_sum = 0
replace new`num'_sum = c`num' if c`num' > 0 & c`num'_cum5 ==0
}


gen no_new_cl5 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num' = rowtotal(new1-new481) if ayear ==`num'
replace no_new_cl5 = no_new_cl`num' if ayear ==`num'
drop no_new_cl`num'
}


gen no_patnew_cl5 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num' = rowtotal(new1_sum-new481_sum) if ayear ==`num'
replace no_patnew_cl5 = no_new_cl`num' if ayear ==`num'
drop no_new_cl`num'
}


***************************************
*Adjusted new classes N patents

*1) notnew50
foreach num of numlist 1/481 {
gen new`num'_sum_50 = 0
replace new`num'_sum_50 = c`num' if c`num' > 0 & c`num'_cum5 ==0 & notnew50_cl`num'==0
}


gen no_new_cl5_50 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_50 = rowtotal(new1_50-new481_50) if ayear ==`num'
replace no_new_cl5_50 = no_new_cl`num'_50 if ayear ==`num'
drop no_new_cl`num'_50
}


gen no_patnew_cl5_50 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_50 = rowtotal(new1_sum_50-new481_sum_50) if ayear ==`num'
replace no_patnew_cl5_50 = no_new_cl`num'_50 if ayear ==`num'
drop no_new_cl`num'_50
}

*2)notnew50_class
foreach num of numlist 1/481 {
gen new`num'_sum_50_class = 0
replace new`num'_sum_50_class = c`num' if c`num' > 0 & c`num'_cum5 ==0 & notnew50_class_cl`num'==0
}


gen no_new_cl5_50_class = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_50_class = rowtotal(new1_50_class-new481_50_class) if ayear ==`num'
replace no_new_cl5_50_class = no_new_cl`num'_50_class if ayear ==`num'
drop no_new_cl`num'_50_class
}


gen no_patnew_cl5_50_class = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_50_class = rowtotal(new1_sum_50_class-new481_sum_50_class) if ayear ==`num'
replace no_patnew_cl5_50_class = no_new_cl`num'_50_class if ayear ==`num'
drop no_new_cl`num'_50_class
}


*3) notnew75
foreach num of numlist 1/481 {
gen new`num'_sum_75 = 0
replace new`num'_sum_75 = c`num' if c`num' > 0 & c`num'_cum5 ==0 & notnew75_cl`num'==0
}


gen no_new_cl5_75 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_75 = rowtotal(new1_75-new481_75) if ayear ==`num'
replace no_new_cl5_75 = no_new_cl`num'_75 if ayear ==`num'
drop no_new_cl`num'_75
}


gen no_patnew_cl5_75 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_75 = rowtotal(new1_sum_75-new481_sum_75) if ayear ==`num'
replace no_patnew_cl5_75 = no_new_cl`num'_75 if ayear ==`num'
drop no_new_cl`num'_75
}

*4)notnew75_class
foreach num of numlist 1/481 {
gen new`num'_sum_75_class = 0
replace new`num'_sum_75_class = c`num' if c`num' > 0 & c`num'_cum5 ==0 & notnew75_class_cl`num'==0
}


gen no_new_cl5_75_class = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_75_class = rowtotal(new1_75_class-new481_75_class) if ayear ==`num'
replace no_new_cl5_75_class = no_new_cl`num'_75_class if ayear ==`num'
drop no_new_cl`num'_75_class
}


gen no_patnew_cl5_75_class = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_75_class = rowtotal(new1_sum_75_class-new481_sum_75_class) if ayear ==`num'
replace no_patnew_cl5_75_class = no_new_cl`num'_75_class if ayear ==`num'
drop no_new_cl`num'_75_class
}

*5) notnew90
foreach num of numlist 1/481 {
gen new`num'_sum_90 = 0
replace new`num'_sum_90 = c`num' if c`num' > 0 & c`num'_cum5 ==0 & notnew90_cl`num'==0
}


gen no_new_cl5_90 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_90 = rowtotal(new1_90-new481_90) if ayear ==`num'
replace no_new_cl5_90 = no_new_cl`num'_90 if ayear ==`num'
drop no_new_cl`num'_90
}


gen no_patnew_cl5_90 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_90 = rowtotal(new1_sum_90-new481_sum_90) if ayear ==`num'
replace no_patnew_cl5_90 = no_new_cl`num'_90 if ayear ==`num'
drop no_new_cl`num'_90
}

*6)notnew90_class
foreach num of numlist 1/481 {
gen new`num'_sum_90_class = 0
replace new`num'_sum_90_class = c`num' if c`num' > 0 & c`num'_cum5 ==0 & notnew90_class_cl`num'==0
}


gen no_new_cl5_90_class = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_90_class = rowtotal(new1_90_class-new481_90_class) if ayear ==`num'
replace no_new_cl5_90_class = no_new_cl`num'_90_class if ayear ==`num'
drop no_new_cl`num'_90_class
}


gen no_patnew_cl5_90_class = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_90_class = rowtotal(new1_sum_90_class-new481_sum_90_class) if ayear ==`num'
replace no_patnew_cl5_90_class = no_new_cl`num'_90_class if ayear ==`num'
drop no_new_cl`num'_90_class
}


*7) notnew_max
foreach num of numlist 1/481 {
gen new`num'_sum_max = 0
replace new`num'_sum_max = c`num' if c`num' > 0 & c`num'_cum5 ==0 & notnew_max_cl`num'==0
}


gen no_new_cl5_max = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_max = rowtotal(new1_max-new481_max) if ayear ==`num'
replace no_new_cl5_max = no_new_cl`num'_max if ayear ==`num'
drop no_new_cl`num'_max
}


gen no_patnew_cl5_max = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num'_max = rowtotal(new1_sum_max-new481_sum_max) if ayear ==`num'
replace no_patnew_cl5_max = no_new_cl`num'_max if ayear ==`num'
drop no_new_cl`num'_max
}

*****************************************
sum no_new_cl5* no_patnew_cl5* 
/*


    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
  no_new_cl5 |     78,280    2.337966    2.925028          0         61
no_new_cl~50 |     78,280    .3703628    .7840797          0         16
no_new_cl5.. |     78,280    .3726622     .787009          0         16
no_new_cl~75 |     78,280    .3893715    .7940326          0         16
no_n~5_class |     78,280    .3954394    .8010023          0         16
-------------+---------------------------------------------------------
no_new_cl~90 |     78,280    .4643204    .8292913          0         16
no_new_cl5.. |     78,280    .4681145    .8432388          0         16
no_new_cl5~x |     78,280     1.78255    2.079725          0         53
no_patnew~l5 |     78,280    3.028411    4.329277          0        161
no_patnew~50 |     78,280    .5144481    1.448149          0         69
-------------+---------------------------------------------------------
no_patnew_.. |     78,280    .5170797    1.450949          0         69
no_patnew~75 |     78,280    .5367782    1.458637          0         69
no_p~5_class |     78,280    .5443408    1.465949          0         69
no_patnew~90 |     78,280    .6250128     1.49356          0         69
no_patnew_.. |     78,280    .6301482    1.508003          0         69
-------------+---------------------------------------------------------
no_patnew_~x |     78,280    2.279254    3.127505          0        141

*/



save ${data}/techprox_temp4.dta, replace

drop new1-new481 new1_sum-new481_sum new1_50-new481_50 new1_75-new481_75 new1_90-new481_90 new1_max-new481_max ///
new1_50_class-new481_50_class new1_75_class-new481_75_class new1_90_class-new481_90_class ///
new1_sum_50-new481_sum_50 new1_sum_50_class-new481_sum_50_class   new1_sum_75-new481_sum_75 new1_sum_75_class-new481_sum_75_class   new1_sum_90-new481_sum_90 new1_sum_90_class-new481_sum_90_class   new1_sum_max-new481_sum_max


**********************************************************************************
******Technological Proximity
*Jaffe's technological prximity measure  
*T_i_j=(Fi*Fj')/( ((Fi*Fi')^0.5)*(Fj*Fj')^0.5))
* ==> T_i_it-1=(Fi*Fit-1')/( ((Fi*Fi')^0.5)*(Fit-1*Fit-1')^0.5))

*1. gen patent stock in t-1 to t-5
gegen pat_stock5 = rowtotal(c1_cum5-c481_cum5) 

*2. fraction of stock patents in each class
foreach num of numlist 1/481 {
gen pat_stock_fr_`num' = c`num'_cum5/pat_stock5
}

foreach num of numlist 1/481 {
replace pat_stock_fr_`num'  = 0 if pat_stock_fr_`num' ==.
}

*3. Count number of patents per year 
egen npat = rowtotal(c1-c481) 
sum npat

*4. gen fraction of patents in each class in t
foreach num of numlist 1/481 {
gen pat_fr_`num' = c`num'/npat
}

*** Calculate techprox
gen fi_fj = 0
gen fi_sq = 0
gen fj_sq = 0
foreach num of numlist 1/481 {
dis `num'
replace fi_fj = fi_fj + (pat_fr_`num') * (pat_stock_fr_`num') 
replace fi_sq = fi_sq + (pat_fr_`num')^2
replace fj_sq = fj_sq + (pat_stock_fr_`num')^2
}

gen T_i_j= fi_fj / sqrt(fi_sq*fj_sq)
gen tp_raw5 = T_i_j

*sum npat no_new_cl5 no_patnew_cl5 tp5 tp_raw5 pat_stock5

ren ayear year
ren npat npat2020
ren no_patnew_cl5 npat_new5_2020

ren no_patnew_cl5_50 npat_new5_50_2020
ren no_patnew_cl5_50_class npat_new5_50_class_2020

ren no_patnew_cl5_75 npat_new5_75_2020
ren no_patnew_cl5_75_class npat_new5_75_class_2020

ren no_patnew_cl5_90 npat_new5_90_2020
ren no_patnew_cl5_90_class npat_new5_90_class_2020

ren no_patnew_cl5_max npat_new5_max_2020

ren tp_raw5 tp_raw5_2020 

gen npat_old5_2020 = npat2020 - npat_new5_2020
gen fr_npat_new5_2020 = npat_new5_2020/npat2020 
ren no_new_cl5 no_new_cl5_2020

gen npat_old5_50_2020 = npat2020 - npat_new5_50_2020
gen fr_npat_new5_50_2020 = npat_new5_50_2020/npat2020 
ren no_new_cl5_50 no_new_cl5_50_2020

gen npat_old5_50_class_2020 = npat2020 - npat_new5_50_class_2020
gen fr_npat_new5_50_class_2020 = npat_new5_50_class_2020/npat2020 
ren no_new_cl5_50_class no_new_cl5_50_class_2020

gen npat_old5_75_2020 = npat2020 - npat_new5_75_2020
gen fr_npat_new5_75_2020 = npat_new5_75_2020/npat2020 
ren no_new_cl5_75 no_new_cl5_75_2020

gen npat_old5_75_class_2020 = npat2020 - npat_new5_75_class_2020
gen fr_npat_new5_75_class_2020 = npat_new5_75_class_2020/npat2020 
ren no_new_cl5_75_class no_new_cl5_75_class_2020

gen npat_old5_90_2020 = npat2020 - npat_new5_90_2020
gen fr_npat_new5_90_2020 = npat_new5_90_2020/npat2020 
ren no_new_cl5_90 no_new_cl5_90_2020

gen npat_old5_90_class_2020 = npat2020 - npat_new5_90_class_2020
gen fr_npat_new5_90_class_2020 = npat_new5_90_class_2020/npat2020 
ren no_new_cl5_90_class no_new_cl5_90_class_2020

gen npat_old5_max_2020 = npat2020 - npat_new5_max_2020
gen fr_npat_new5_max_2020 = npat_new5_max_2020/npat2020 
ren no_new_cl5_max no_new_cl5_max_2020

sum npat2020 npat_new5_2020 npat_old5_2020 fr_npat_new5_2020 tp_raw5_2020 no_new_cl5_2020
sum npat2020 npat_new5_50_2020 npat_old5_50_2020 fr_npat_new5_50_2020 no_new_cl5_50_2020
sum npat2020 npat_new5_50_class_2020 fr_npat_new5_50_class_2020 fr_npat_new5_50_class_2020 no_new_cl5_50_class_2020

sum npat2020 npat_new5_75_2020 npat_old5_75_2020 fr_npat_new5_75_2020 no_new_cl5_75_2020
sum npat2020 npat_new5_75_class_2020 npat_old5_75_class_2020 fr_npat_new5_75_class_2020 no_new_cl5_75_class_2020

sum npat2020 npat_new5_90_2020 npat_old5_90_2020 fr_npat_new5_90_2020 no_new_cl5_90_2020
sum npat2020 npat_new5_90_class_2020 npat_old5_90_class_2020 fr_npat_new5_90_class_2020 no_new_cl5_90_class_2020

sum npat2020 npat_new5_max_2020 npat_old5_max_2020 fr_npat_new5_max_2020 no_new_cl5_max_2020


destring gvkey, replace
keep gvkey year npat2020 npat_new5* npat_old5* fr_npat_new5* no_new_cl5*_2020  tp_raw5_2020 
save ${data}/techprox_newptas20221003_pdate.dta, replace
